library(grf)
library(foreign)
library(haven)
library(rio)
library(party)
library(mltools)
library(mlr)
library(data.table)
library(dplyr)
library(labelled)
library(caTools)
library(xgboost)
library(caret)
library(tree)
library(datasets)
require(haven)
library(Matrix)
#library(Seurat)
library(gtools)
library(binsreg)

rm(list=ls())


### Untreated ###

train <- rio::import("I:/Workdata/706727/Build/Data/MLSevdata_Treat.dta")

X <- as.matrix(select(train,-pnr, -year,-sev))
Y <- as.factor(train$sev)

set.seed(112)

lrn <- makeLearner("classif.xgboost",predict.type="prob")
lrn$par.vals <- list(objective="binary:logistic", eval_metric="auc", nrounds=100L,nthread=16, eta=1)

gridparams <- makeParamSet(makeIntegerParam("max_depth",lower=1L,upper=10L),
                           makeNumericParam("subsample",lower=0.25,upper=1),
                           makeNumericParam("colsample_bytree",lower=0.25,upper=1),
                           makeNumericParam("gamma",lower=0,upper=10))

dataframetrain <- as.data.frame(X)
dataframetrain$Y <- Y
traintask <- makeClassifTask(data=dataframetrain, target="Y")

rdesc <- makeResampleDesc("CV", stratify=T, iters=5L)
ctrl <- makeTuneControlRandom(maxit=20L)
mytune <- tuneParams(learner=lrn, task=traintask, resampling=rdesc, measures=auc,
                     par.set=gridparams, control=ctrl, show.info=T)

xgb_params <- list(
  booster="gbtree",
  eta=0.01,
  nthread=16,
  objective="binary:logistic",
  eval_metric="auc")

xgb_params <- append(xgb_params, mytune$x)

Y <- as.numeric(Y)-1
xgb_train <- xgb.DMatrix(data=X, label=Y)

# Number of rounds #
xgbcv <- xgb.cv(
  params=xgb_params,
  data=xgb_train,
  nrounds=5000,
  early_stopping_rounds=5,
  nfold=5
)
numrounds <- xgbcv$best_iteration
model <- xgb.train(params=xgb_params, data=xgb_train, nrounds=numrounds, verbose=1)

imp <- xgb.importance(model=model)
imp$Feature

## Predict in test sample ##
full <- rio::import("I:/Workdata/706727/Build/Data/MLSevdata_Full.dta")

Xtest <- as.matrix(select(full,-pnr, -year,-setdiff(colnames(full),colnames(train))))
pred <- predict(model,Xtest, reshape=T)

exp <-cbind(select(full,pnr, year),pred)

rio::export(exp,"I:/Workdata/706727/Build/Data/MLSevdata_Pred.dta")


